clear
global spec = "new_kl"
local spec = "$spec"

use "$BS_fold/local1/inputs/pre_reg_data_ProdR_`spec'"

egen prod_id = group(co_code1 product_name5)
xtset prod_id year
replace N = 0 if N==.
replace D = 0 if D==.

generate no_D = 1 - D 
generate no_N = 1- N

drop if shares==0 //Should this be done earlier?
drop if shares==.

bysort co_code1 year: egen T_no_D = total(no_D)
bysort co_code1 year: egen T_no_N = total(no_N)

generate delta_share_num = share^(delta_hat)
bysort co_code1 year: egen delta_share_denom = total(delta_share_num)
generate delta_share = delta_share_num/delta_share_denom

generate no_D_delta_share = no_D*delta_share
generate no_N_delta_share = no_N*delta_share
generate D_delta_share = D*delta_share
generate N_delta_share = N*delta_share

bysort co_code1 year: egen T_no_D_delta_share = total(no_D_delta_share)
bysort co_code1 year: egen T_no_N_delta_share = total(no_N_delta_share)
bysort co_code1 year: egen T_D_delta_share = total(D_delta_share)
bysort co_code1 year: egen T_N_delta_share = total(N_delta_share)

generate no_D_shares = (no_D*delta_share)/T_no_D_delta_share
generate no_N_shares = (no_N*delta_share)/T_no_N_delta_share
generate D_shares = (D*delta_share)/T_D_delta_share
generate N_shares = (N*delta_share)/T_N_delta_share

replace no_D_shares = 0 if T_no_D_delta_share==0
replace no_N_shares = 0 if T_no_N_delta_share==0
replace D_shares= 0 if T_D_delta_share==0
replace N_shares = 0 if T_N_delta_share==0

generate H = delta_share_num*exp(h_j)
generate a_j= delta_hat*omega_j

generate full_H_cov = exp(h_j)*delta_share

***Calculate unweighted productivity measures for the two groups:
foreach variable in h_j a_j d_j {
	generate `variable'_no_D = `variable'*no_D
	generate `variable'_no_N = `variable'*no_N
	bysort co_code1 year: egen T_`variable'_no_D = total(`variable'_no_D)
	bysort co_code1 year: egen T_`variable'_no_N = total(`variable'_no_N)
	generate UWM_`variable'_no_D = T_`variable'_no_D/T_no_D
	generate UWM_`variable'_no_N = T_`variable'_no_N/T_no_N
	replace UWM_`variable'_no_D = 0 if T_no_D==0
	replace UWM_`variable'_no_N =0 if T_no_N==0
			
}

generate h_j_hat_no_D = h_j - UWM_h_j_no_D
generate h_j_hat_no_N = h_j - UWM_h_j_no_N

generate no_D_H_hat_cov = exp(h_j_hat_no_D)*no_D_shares
generate no_N_H_hat_cov = exp(h_j_hat_no_N)*no_N_shares
generate no_D_H_cov = exp(h_j)*no_D_shares
generate no_N_H_cov = exp(h_j)*no_N_shares
generate D_H_cov = exp(h_j)*D_shares
generate N_H_cov = exp(h_j)*N_shares





* Create a dummy for firms that are multi-products at least in one year
bys co_code1: egen fobs = count(year)
egen tag = tag(co_code1 year)
egen fobsy = total(tag), by(co_code1)
gen multi_prod = fobs > fobsy
bys co_code1: egen sw = nvals(product_name5) if multi_prod == 0 // single product firms that are switching to another product
replace multi_prod = 1 if sw > 1 & sw != .
**

collapse (mean) first_year last_year  ///
	UWM_d_j_no_D UWM_d_j_no_N UWM_a_j_no_N UWM_a_j_no_D UWM_h_j_no_D UWM_h_j_no_N ///
	multi_prod ///
	(sum) no_D no_N N D ///
	H full_H_cov delta_share_num no_D_H_cov no_N_H_cov D_H_cov N_H_cov no_D_H_hat_cov no_N_H_hat_cov ///
	no_D_shares no_N_shares D_shares N_shares D_delta_share N_delta_share (sd) h_j_hat_no_D h_j_hat_no_N , by(co_code1 year)

	replace h_j_hat_no_D = 0 if h_j_hat_no_D==.
	replace h_j_hat_no_N = 0 if  h_j_hat_no_N==.
	
	generate total_prods_1 = no_D + D
	generate total_prods_2 = no_N + N
	generate test = total_prods_1 - total_prods_2 

	sum test if first_year !=1 & last_year !=1
	sum total_prods_1 if last_year !=1
	sum total_prods_2 if first_year !=1

	foreach variable in H no_D_H_cov no_N_H_cov D_H_cov N_H_cov delta_share_num full_H_cov no_D_H_hat_cov no_N_H_hat_cov {
		generate ln_`variable' = ln(`variable')

	}

	generate ln_H_alt = ln_full_H_cov + ln_delta_share_num
	generate tester = ln_H - ln_H_alt
	sum tester if first_year ==1 | last_year ==1, detail
	sum tester, detail

	xtset co_code1 year

	generate full_dif_TFPR = D.ln_H
	generate dif_d_bar = UWM_d_j_no_N - L.UWM_d_j_no_D
	generate dif_a_bar = UWM_a_j_no_N - L.UWM_a_j_no_D
	generate dif_h_bar = UWM_h_j_no_N - L.UWM_h_j_no_D

	generate dif_cov_stay = ln_no_N_H_hat_cov - L.ln_no_D_H_hat_cov
	generate dif_var_adj = D.ln_delta_share_num

	generate adding_term = ln(1 + N_delta_share*((N_H_cov - no_N_H_cov)/(no_N_H_cov))) 
	generate drop_term = -ln(1 + L.D_delta_share*((L.D_H_cov - L.no_D_H_cov)/(L.no_D_H_cov)))

	generate full_dif_TFPR_alt = dif_d_bar + dif_a_bar + dif_cov_stay + dif_var_adj + adding_term + drop_term

	generate test2 = full_dif_TFPR - full_dif_TFPR_alt

	sum test2, detail // Seems about right- some rounding errors, but still close relative to orders of magnitude

	
	generate within_var = h_j_hat_no_N - L.h_j_hat_no_D
	keep co_code1 year ///
	full_dif_TFPR dif_d_bar dif_a_bar dif_h_bar dif_cov_stay dif_var_adj adding_term drop_term multi_prod within_var

	save "$BS_fold/local1/inputs/add_drop_categories_ProdR_`spec'_xtra", replace
	
clear

use "$BS_fold/local1/inputs/pre_reg_data_ProdR_`spec'"

collapse (sum) sales_value (mean) beta_* delta_* l k ln_IND ln_IVS cleaned_number_prods I_sq IND_CHN IND_WLD LMI_CHN LMI_WLD LMI_WLD_EXCL_IND LMI_CHN_EXCL_IND  ///
	wIPT_IND_CHN wIPT_IND_WLD wIPT_LMI_CHN wIPT_LMI_WLD wIPT_LMI_WLD_EXCL_IND wIPT_LMI_CHN_EXCL_IND rwIPT_IND_CHN rwIPT_IND_WLD rwIPT_LMI_CHN rwIPT_LMI_WLD rwIPT_LMI_WLD_EXCL_IND rwIPT_LMI_CHN_EXCL_IND , by(co_code1 year nic_08_2dig nic_08_4dig)

	gen r = ln(sales_value)
	gen tfpr = r - beta_k_hat_r * k - beta_l_hat_r * l - beta_lk_hat_r* I_sq

	merge 1:1 co_code1 year using "$BS_fold/local1/inputs/add_drop_categories_ProdR_`spec'_xtra"
	keep if _merge == 3
	drop _merge

	save "$BS_fold/local1/inputs/decomposition_SS_ProdR_`spec'_xtra", replace
	
	
use "$BS_fold/local1/inputs/decomposition_SS_ProdR_`spec'_xtra", clear

xtset  co_code1 year, yearly
destring(nic_08_2dig), g(nic2)

gen diff_ln_IND = ln_IND - L.ln_IND
gen diff_ln_IVS = ln_IVS - L.ln_IVS
gen diff_tfpr = tfpr - L.tfpr

foreach var in wIPT_IND_CHN wIPT_LMI_CHN_EXCL_IND rwIPT_IND_CHN rwIPT_LMI_CHN_EXCL_IND {
	generate ln_`var' = ln(`var')
	generate diff_`var' = ln_`var' - L.ln_`var'
}

foreach v in diff_tfpr diff_ln_IND diff_ln_IVS {
	drop if missing(`v')
}

** To get the final sample for all regs:
	ivreghdfe drop_term (diff_ln_IND  diff_wIPT_IND_CHN = diff_ln_IVS diff_wIPT_LMI_CHN_EXCL_IND) , a(nic2#year) first cluster(nic_08_4dig)
			generate sample0 = e(sample)
			keep if sample0 == 1


ivreghdfe within_var (diff_ln_IND  diff_wIPT_IND_CHN = diff_ln_IVS diff_wIPT_LMI_CHN_EXCL_IND ) , a(year) first cluster(nic_08_4dig)
	eststo w_1
	local N_1= e(N)
	mat f = e(first)
	local F_OUT_1 = round(f[8,1], .01)
	*mat list f
	cap local F_IN_1 = round(f[8,2], .01)
	
ivreghdfe within_var (diff_ln_IND  diff_wIPT_IND_CHN = diff_ln_IVS diff_wIPT_LMI_CHN_EXCL_IND )  , a(year nic2) first cluster(nic_08_4dig)
	eststo w_2
	local N_2= e(N)
	mat f = e(first)
	local F_OUT_2 = round(f[8,1], .01)
	*mat list f
	cap local F_IN_2 = round(f[8,2], .01)
	
ivreghdfe within_var (diff_ln_IND  diff_wIPT_IND_CHN = diff_ln_IVS diff_wIPT_LMI_CHN_EXCL_IND ) , a(nic2#year) first cluster(nic_08_4dig)
	eststo w_3
	local N_3= e(N)
	mat f = e(first)
	local F_OUT_3 = round(f[8,1], .01)
	*mat list f
	cap local F_IN_3 = round(f[8,2], .01)

ivreghdfe within_var (diff_ln_IND  = diff_ln_IVS ) diff_wIPT_LMI_CHN_EXCL_IND, a(year) first cluster(nic_08_4dig)
	eststo w_4
	local N_4= e(N)
	mat f = e(first)
	local F_OUT_4 = round(f[8,1], .01)
	*mat list f
	cap local F_IN_4 = round(f[8,2], .01)
				

ivreghdfe within_var (diff_ln_IND   = diff_ln_IVS)  diff_wIPT_LMI_CHN_EXCL_IND, a(year nic2) first cluster(nic_08_4dig)
	eststo w_5
	local N_5= e(N)
	mat f = e(first)
	local F_OUT_5 = round(f[8,1], .01)
	*mat list f
	cap local F_IN_5 = round(f[8,2], .01)

		
ivreghdfe within_var (diff_ln_IND  = diff_ln_IVS) diff_wIPT_LMI_CHN_EXCL_IND , a(nic2#year) first cluster(nic_08_4dig)
	eststo w_6
	local N_6= e(N)
	mat f = e(first)
	local F_OUT_6 = round(f[8,1], .01)
	*mat list f
	cap local F_IN_6 = round(f[8,2], .01)
	
label var diff_ln_IND "OutputComp"
label var diff_wIPT_IND_CHN "InputAccess"
label var diff_wIPT_LMI_CHN_EXCL_IND "InputAccessIV"
	

esttab w_1 w_2 w_3 w_4 w_5 w_6 using "$tables/WithinVariance_Regs.tex" , cells(b(nostar fmt(%9.3f)) se(par fmt(%9.3f))) nodepvars ///
eqlabels(none) nomtitle keep(diff_ln_IND diff_wIPT_IND_CHN diff_wIPT_LMI_CHN_EXCL_IND) label ///
 collabels(none) replace 	prefoot("\hline" "F-stat OutputComp & `F_OUT_1' & `F_OUT_2' & `F_OUT_3' & `F_OUT_4' & `F_OUT_5' & `F_OUT_6' \\" ///
	"F-Stat InputAccess & `F_IN_1' & `F_IN_2' &  `F_IN_3' &  `F_IN_4' &  `F_IN_5' &  `F_IN_6'  \\" )

				
				
